################################################################################-
# Replication File for Wratil, Wäckerle and Proksch: Government Rhetoric and the 
# Representation of Public Opinion in International Negotiations
#
# This script runs the STM models with alternative public opinion effects 
# in Appendix K. 
#
# Many parts of this script take significant amount of time.
# These calculations can be skipped by leaving the settings below at "no".
# If you want to include these steps, switch the respective settings to "yes".
# The analyses in Appendix K are run with 2,500 simulations, while the main analysis
# was run with 10,000. This can be changed in the settings.
#
# Make sure you have the package reshape2 installed.
#
# Additionally, the script produces the following graphs and tables:
# Figures K1 to K8 (except for K4)
# Tables K1 to K9
################################################################################-

#### Set Up and Load Data ####
library(tidyverse)          #version 1.3.2
library(quanteda)           #version 3.2.1
library(quanteda.textstats) #version 0.95
library(stm)                #version 1.3.6
library(reshape)            #version 0.8.9

theme_interaction <- theme(plot.title = element_text(face="bold"),
                           legend.position = "none",
                           axis.text=element_text(colour="black"),
                           axis.title=element_text(size=8,colour="black"),
                           strip.text.y = element_text(angle = 0, face="bold"),
                           strip.background =element_rect(fill="white"),
                           axis.ticks = element_blank(),
                           panel.background = element_blank(), 
                           panel.spacing.x = unit(0,"line"))

source("99_functions.R")

load("generated_data/dfm_for_stm.RData")

#### Settings ####

n_simulation <- 2500

run_a1_squared_stm            <- "no"
run_a1_squared_estimateEffect <- "no"
run_a1_squared_regressions    <- "no"

run_a2_control_distance_stm            <- "no"
run_a2_control_distance_estimateEffect <- "no"
run_a2_control_distance_regressions    <- "no"

run_a3_distance_interaction_stm            <- "no"
run_a3_distance_interaction_estimateEffect <- "no"
run_a3_distance_interaction_regressions    <- "no"

run_a4_distance_linear_stm            <- "no"
run_a4_distance_linear_estimateEffect <- "no"
run_a4_distance_linear_regressions    <- "no"

run_a5_bonds_stm            <- "no"
run_a5_bonds_estimateEffect <- "no"
run_a5_bonds_regressions    <- "no"

run_a6_negotiation_stm            <- "no"
run_a6_negotiation_estimateEffect <- "no"
run_a6_negotiation_regressions    <- "no"

run_a7_length_stm            <- "no"
run_a7_length_estimateEffect <- "no"
run_a7_length_regressions    <- "no"

run_a8_size_stm            <- "no"
run_a8_size_estimateEffect <- "no"
run_a8_size_regressions    <- "no"

#######################################-
#### Alternative 1: Squared Public Image ####
#######################################-

data_dfm_a1 <- data_dfm
docvars(data_dfm_a1) <- docvars(data_dfm_a1) %>% 
  data.frame() %>% 
  select(gov_eu_supporter,image_lag6m_scaled,date_correct,Transcription,
         part_of_speech,text_copy,text_original,Actor,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final)
data_dfm_a1 <- dfm_subset(data_dfm_a1,complete.cases(docvars(data_dfm_a1)))

if(run_a1_squared_stm=="yes"){
  set.seed(1711)
  stm_out_a1 <- stm(documents = data_dfm_a1,K=40,
                                          prevalence=~ poly(image_lag6m_scaled,2)+
                                            part_of_speech+gov_eu_supporter+
                                            gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                            unemployment_scaled+inflation_scaled+north_south+
                                            Negotiation_Stage+Council_Config_final,
                                          data = docvars(data_dfm_a1),
                                          init.type = "Spectral")
  
  save(file="generated_data/stm_out_a1.RData",stm_out_a1)
}else{
  load(file="generated_data/stm_out_a1.RData")
}

#### ...estimateEffect ####

if(run_a1_squared_estimateEffect=="yes"){
  prep_council_a1 <- estimateEffect_cluster(c(12,13,22,32,38,40) ~ poly(image_lag6m_scaled,2)+
                                              part_of_speech+gov_eu_supporter+
                                              gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                              unemployment_scaled+inflation_scaled+north_south+
                                              Negotiation_Stage+Council_Config_final,
                                            stm_out_a1,nsims = n_simulation,
                                            meta = docvars(data_dfm_a1), 
                                            uncertainty = "Global")
  
  save(file="generated_data/prep_council_a1.RData",prep_council_a1)
}else{
  load(file="generated_data/prep_council_a1.RData")
}

#### ...regressions ####
if(run_a1_squared_regressions=="yes"){
  mod_top_12 <- summary.estimateEffect_comb(prep_council_a1,topics = 12)[[1]]
  mod_top_13 <- summary.estimateEffect_comb(prep_council_a1,topics = 13)[[1]]
  mod_top_22 <- summary.estimateEffect_comb(prep_council_a1,topics = 22)[[1]]
  mod_top_32 <- summary.estimateEffect_comb(prep_council_a1,topics = 32)[[1]]
  mod_top_38 <- summary.estimateEffect_comb(prep_council_a1,topics = 38)[[1]]
  mod_top_40 <- summary.estimateEffect_comb(prep_council_a1,topics = 40)[[1]]
  save(file="generated_data/regression_results_a1.RData",
       mod_top_12,
       mod_top_13,mod_top_22,
       mod_top_32,mod_top_38,
       mod_top_40)
}else{
  print("Loading saved regression interaction")
  load(file="generated_data/regression_results_a1.RData")
}

data.sum_a1 <- docvars(data_dfm_a1)
data.sum_a1$fake_theta <- runif(n = nrow(data.sum_a1),min = 0,max = 1)
m1 <- lm(fake_theta ~ poly(image_lag6m_scaled,2)+
                  part_of_speech+gov_eu_supporter+
                  gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                  unemployment_scaled+inflation_scaled+north_south+
                  Negotiation_Stage+Council_Config_final,
                data = data.sum_a1)

# This is Table K1
stargazer::stargazer(m1,m1,m1,
                     m1,m1,m1,
                    #    type = "text",
                     dep.var.labels   = "Topic Prevalence",
                     column.labels = c("Topic 12","Topic 13","Topic 22",
                                       "Topic 32","Topic 38","Topic 40"),
                     covariate.labels = c("Intercept","Public Image of the EU","Public Image of the EU (Squared)", "Middle Part of Speech",
                                          "End Part of Speech","Eurosceptic Government", "Government Left-Right position", 
                                          "Net receipts from EU budget",
                                          "Budget issue","Unanimity Required","Unemployment Rate","Inflation Rate",
                                          "Northern Europe","Southern Europe","Negotiation stage: Initial Presentation",
                                          "Negotiation stage: Mixed Negotiations","Negotiation stage: Policy Debates",
                                          "Council configuration: Ecofin","Council configuration: EPSCO",
                                          "Council configuration: ENV","Council configuration: JHA"),
                     coef = list(mod_top_12$tables[[1]][,1],
                                 mod_top_13$tables[[1]][,1],
                                 mod_top_22$tables[[1]][,1],
                                 mod_top_32$tables[[1]][,1],
                                 mod_top_38$tables[[1]][,1],
                                 mod_top_40$tables[[1]][,1]),
                     se = list(mod_top_12$tables[[1]][,2],
                               mod_top_13$tables[[1]][,2],
                               mod_top_22$tables[[1]][,2],
                               mod_top_32$tables[[1]][,2],
                               mod_top_38$tables[[1]][,2],
                               mod_top_40$tables[[1]][,2]),
                     omit.stat = c("f","ll","rsq","adj.rsq","ser"),
                     intercept.bottom = FALSE)
rm(m1)

getmode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}
median(data.sum_a1$gov_lr_cmp_static_scaled) %>% round(2)
median(data.sum_a1$eu_receipts_gdp_scaled) %>% round(2)
median(data.sum_a1$unemployment_scaled) %>% round(2)
median(data.sum_a1$inflation_scaled) %>% round(2)

getmode(data.sum_a1$part_of_speech)
getmode(data.sum_a1$budget_any)
getmode(data.sum_a1$unanimity_any)
getmode(data.sum_a1$north_south)
getmode(data.sum_a1$Negotiation_Stage)
getmode(data.sum_a1$Council_Config_final)

plot.dat.38 <- plot(prep_council_a1,covariate="image_lag6m_scaled",
                    topics=c(38),
                    method="continuous",
                    omit.plot = TRUE)
quantile(data.sum_a1$image_lag6m_scaled,c(0.05,0.1,0.9,0.95))

plot.dat.38.p <- data.frame(x=plot.dat.38$x,
                            means=plot.dat.38$means[[1]],
                            ci.low=plot.dat.38$ci[[1]][1,],
                            ci.high=plot.dat.38$ci[[1]][2,]) %>% 
  filter(x>-1.79&x<1.73) %>% 
  ggplot(aes(x=x,y=means))+
  geom_line()+
  geom_line(aes(x=x,y=ci.low),lty="dashed")+
  geom_line(aes(x=x,y=ci.high),lty="dashed")+
  labs(x="Public Image of the EU (z-score)",y="Estimated Topic Proportion",
       title="Topic 38")+
  theme_bw()

# This is Figure K1
ggsave(plot = plot.dat.38.p,"figures_appendix/figure_k_1.eps", width = 3, height = 3, units = "in")

#######################################-
#### Alternative 2: Controlling for Distance Model ####
#######################################-

data_dfm_a2 <- data_dfm
docvars(data_dfm_a2) <- docvars(data_dfm_a2) %>% 
  data.frame() %>% 
  select(gov_eu_supporter,image_lag6m_scaled,date_correct,Transcription,final_two_months,
         part_of_speech,text_copy,text_original,Actor,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final)
data_dfm_a2 <- dfm_subset(data_dfm_a2,complete.cases(docvars(data_dfm_a2)))


docvars(data_dfm_a2) %>% 
  data.frame() %>% 
  group_by(final_two_months) %>% 

if(run_a2_control_distance_stm == "yes"){
  set.seed(1711)
  stm_out_a2 <- stm(documents = data_dfm_a2,K=40,
                    prevalence=~ gov_eu_supporter*image_lag6m_scaled+
                      part_of_speech+final_two_months+
                      gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                      unemployment_scaled+inflation_scaled+north_south+
                      Negotiation_Stage+Council_Config_final,
                    data = docvars(data_dfm_a2),
                    init.type = "Spectral")
  save(file="generated_data/stm_out_a2.RData",stm_out_a2)
}else{
  load(file="generated_data/stm_out_a2.RData")
}

#######################################-
#### ...FREX Similarity ####
#######################################-
frex_main <- read.csv("generated_data/frex_words_40_main_model.csv") %>% mutate(complete = NA)
for(i in 1:nrow(frex_main)){
  frex_main$complete[i] <- paste(frex_main[i,3:22],collapse=" ")
}

frex_a2 <- stm::labelTopics(stm_out_a2,n = 20)
frex_a2 <- frex_a2[["frex"]] %>% data.frame() %>% mutate(X=1:40,complete = NA)
for(i in 1:nrow(frex_a2)){
  frex_a2$complete[i] <- paste(frex_a2[i,1:20],collapse=" ")
}

all_words_a2 <- bind_rows(frex_main %>% 
                         select(X,complete) %>% 
                         mutate(model="main"),
                       frex_a2 %>% 
                         select(X,complete) %>% 
                         mutate(model="distance_control"))

dfmat_a2 <- corpus(all_words_a2,text_field = "complete") %>% tokens() %>% dfm()
tstat2_a2 <- textstat_simil(dfmat_a2, method = "cosine", 
                         margin = "documents")

tstat2_pw_a2 <- as.matrix(tstat2_a2) %>% 
  reshape::melt() %>% 
  filter(!is.na(value)) %>% 
  filter(X1!=X2) %>% 
  mutate(basetext = as.numeric(str_remove(X1,"text")),
         comptext = as.numeric(str_remove(X2,"text"))) %>% 
  mutate(robustness_test = case_when(
    comptext %in% 41:80 ~ "distance_control")) %>% 
  mutate(comptext = case_when(
    robustness_test =="distance_control" ~  comptext -40)) %>% 
  filter(basetext%in%1:40) %>% 
  arrange(robustness_test,comptext)

tstat2_pw_a2 %>% 
  filter(basetext %in% c("12","13","22",
                         "32","38","40")) %>% 
  group_by(basetext) %>% 
  filter(value == max(value))

# Test: Is there a new citizen-related topic?
all_words_a2 %>% 
  filter(model=="distance_control") %>% 
  filter(grepl("vote|citizen|public",complete))

#### ...estimateEffect ####

if(run_a2_control_distance_estimateEffect=="yes"){
  prep_council_a2 <- estimateEffect_cluster(1:40 ~ gov_eu_supporter*image_lag6m_scaled+
                                              part_of_speech+final_two_months+
                                              gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                              unemployment_scaled+inflation_scaled+north_south+
                                              Negotiation_Stage+Council_Config_final,
                                            stm_out_a2,nsims = n_simulation,
                                            meta = docvars(data_dfm_a2), 
                                            uncertainty = "Global")
  save(file="generated_data/prep_council_a2.RData",prep_council_a2)
}else{
  load(file="generated_data/prep_council_a2.RData")
}

#### ...regressions ####

if(run_a2_control_distance_regressions=="yes"){
  mod_top_all_a2 <- summary.estimateEffect_comb(prep_council_a2,topics = 1:40)[[1]]
  
  all_reg_a2 <- mod_top_all_a2$tables
  all_reg_months_a2 <- data.frame(topic = 1:40,
                               est = NA,
                               se = NA)
  
  for (i in 1:nrow(all_reg_months_a2)){
    mod_save <- all_reg_a2[[i]] %>% as.data.frame() %>% mutate(term=row.names(.))
    all_reg_months_a2$est[i] <- mod_save %>% filter(term == "final_two_monthsMore Than Two Months Before Election") %>% select(Estimate) %>% unlist()
    all_reg_months_a2$se[i] <- mod_save %>% filter(term == "final_two_monthsMore Than Two Months Before Election") %>% select(`Std. Error`) %>% unlist()
    rm(mod_save)
  }
  
  all_reg_months_a2$lower = all_reg_months_a2$est - 1.96*all_reg_months_a2$se
  all_reg_months_a2$upper = all_reg_months_a2$est + 1.96*all_reg_months_a2$se
  
  data.sum <- docvars(data_dfm_a2)
  
  #### Regression Topic 12 ###
  tab_12_a2 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 12,
                                        estimate_object = prep_council_a2)
  
  #### Regression Topic 13 ###
  tab_13_a2 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 13,
                                        estimate_object = prep_council_a2)
  
  #### Regression Topic 22 ###
  tab_22_a2 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 22,
                                        estimate_object = prep_council_a2)
  
  #### Regression Topic 32 ###
  tab_32_a2 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 32,
                                        estimate_object = prep_council_a2)
  
  #### Regression Topic 38 ###
  tab_38_a2 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 38,
                                        estimate_object = prep_council_a2)
  
  #### Regression Topic 40 ###
  tab_40_a2 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 40,
                                        estimate_object = prep_council_a2)
  
  results_a2 <- bind_rows(tab_12_a2,tab_13_a2,tab_22_a2,
                          tab_32_a2,tab_38_a2,tab_40_a2) %>% 
    mutate(Topic_name = c(rep("Delaying agreement",2),
                          rep("Formulating a demand",2),
                          rep("Supporting the compromise",2),
                          rep("More technical-level discussion needed",2),
                          rep("Cautious language",2),
                          rep("Raising a concern",2))) %>% 
    mutate(upper = Estimate + 1.96*Std.Error,
           lower = Estimate - 1.96*Std.Error)
  save(file="generated_data/regression_results_a2.RData",all_reg_months_a2,results_a2)
  
}else{
  print("Loading saved regression interaction")
  load(file="generated_data/regression_results_a2.RData")
}

results_a2$moderator_level <- results_a2$moderator_level %>% 
  recode("baseline" = "Pro-EU Government",
         "gov_eu_supporterEurosceptic Government:image_lag6m_scaled" = "Eurosceptic Government")

# This is Table K2
xtable::xtable(results_a2 %>% 
                 select(-upper,-lower) %>% 
                 dplyr::rename('Government Ideology' = "moderator_level",
                               'Topic Label' = "Topic_name",
                               't-value' = "tval",
                               'p-value' = "p"),digits=5) %>% 
  print(include.rowname=FALSE)

all_reg_months_a2$topic_name<- all_reg_months_a2$topic %>% 
  recode("1"  = "Topic 1: Thanking I (rather specific person)",
         "2"  = "Topic 2: Exchange of information",
         "3"  = "Topic 3: Internal market",
         "4"  = "Topic 4: Ships",
         "5"  = "Topic 5: Reflection",
         "6"  = "Topic 6: Thanking II (rather abstract groups)",
         "7"  = "Topic 7: European judicial matters / \nEuropean public prosecutor",
         "8"  = "Topic 8: Referring to Commissioner",
         "9"  = "Topic 9: Banking supervision",
         "10" = "Topic 10: Burdens of implementation \n(esp. environment)",
         "11" = "Topic 11: Renewable energy and climate",
         "12" = "Topic 12: Delaying agreement",
         "13" = "Topic 13: Formulating a demand",
         "14" = "Topic 14: Invasive species",
         "15" = "Topic 15: GMOs",
         "16" = "Topic 16: Crime",
         "17" = "Topic 17: Budget",
         "18" = "Topic 18: Talking about legal text",
         "19" = "Topic 19: International money laundering",
         "20" = "Topic 20: Governance of four freedoms \n(e.g. capital, workers)",
         "21" = "Topic 21: Health and medical devices",
         "22" = "Topic 22: Supporting the compromise",
         "23" = "Topic 23: Congratulating",
         "24" = "Topic 24: Legal harmonization",
         "25" = "Topic 25: Cooperation between member states",
         "26" = "Topic 26: Air emissions and pollutants",
         "27" = "Topic 27: Tax evasion and fraud",
         "28" = "Topic 28: Banking union (esp. \nresolution and deposit insurance)",
         "29" = "Topic 29: Data protection",
         "30" = "Topic 30: Public procurement and tobacco",
         "31" = "Topic 31: Financial crisis",
         "32" = "Topic 32: More technical-level discussion needed",
         "33" = "Topic 33: Research and development \n(e.g. Horizon 2020)",
         "34" = "Topic 34: Audit and control",
         "35" = "Topic 35: Talking about reaching compromise",
         "36" = "Topic 36: Negotiations with EP",
         "37" = "Topic 37: Brief intervention",
         "38" = "Topic 38: Cautious language",
         "39" = "Topic 39: Affirmation",
         "40" = "Topic 40: Raising a concern"
  )

months_direct_plot_a2 <- all_reg_months_a2 %>% 
  ggplot(aes(x=est,xmin=lower,xmax=upper,y=reorder(topic_name,-topic))) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of debate being two months before election on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme(plot.title = element_text(face="bold"),
        legend.position = "none",
        axis.text=element_text(colour="black"),
        axis.title=element_text(size=8,colour="black"),
        strip.text.y = element_text(angle = 0, face="bold"),
        strip.background =element_rect(fill="white"),
        axis.ticks = element_blank(),
        panel.background = element_blank(), 
        panel.spacing.x = unit(0,"line"))

# This is Figure K3
ggsave(plot = months_direct_plot_a2,
       "figures_appendix/figure_k_3.eps", width = 6, height = 7.5, units = "in")

europhiles_a2 <- results_a2 %>% 
  dplyr::rename("term" = "Topic_name",
         "model" = "moderator_level",
         "estimate" = "Estimate",
         "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 3",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Pro-EU Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

eurosceptics_a2 <- results_a2 %>% 
  dplyr::rename("term" = "Topic_name",
         "model" = "moderator_level",
         "estimate" = "Estimate",
         "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 3",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Eurosceptic Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

# This is Figure K2
ggsave(plot = ggpubr::ggarrange(eurosceptics_a2,europhiles_a2,ncol=1,nrow=2),
       "figures_appendix/figure_k_2.eps", width = 6, height = 6, units = "in")

#######################################-
#### Alternative 3: Distance Interaction Model ####
#######################################-
data_dfm_a3 <- data_dfm
docvars(data_dfm_a3) <- docvars(data_dfm_a3) %>% 
  data.frame() %>% 
  select(gov_eu_supporter,image_lag6m_scaled,date_correct,Transcription,final_two_months,
         part_of_speech,text_copy,text_original,Actor,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final)
data_dfm_a3 <- dfm_subset(data_dfm_a3,complete.cases(docvars(data_dfm_a3)))

if(run_a3_distance_interaction_stm == "yes"){
  set.seed(1711)
  stm_out_a3 <- stm(documents = data_dfm_a3,
                    K=40,
                    prevalence=~ final_two_months*image_lag6m_scaled+
                      part_of_speech+gov_eu_supporter+
                      gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                      unemployment_scaled+inflation_scaled+north_south+
                      Negotiation_Stage+Council_Config_final,
                    data = docvars(data_dfm_a3),
                    init.type = "Spectral")
  save(file="generated_data/stm_out_a3.RData",stm_out_a3)
}else{
  load(file="generated_data/stm_out_a3.RData")
}

#######################################-
#### ...FREX Similarity ####
#######################################-
frex_main <- read.csv("generated_data/frex_words_40_main_model.csv") %>% mutate(complete = NA)
for(i in 1:nrow(frex_main)){
  frex_main$complete[i] <- paste(frex_main[i,3:22],collapse=" ")
}

frex_a3 <- stm::labelTopics(stm_out_a3,n = 20)
frex_a3 <- frex_a3[["frex"]] %>% data.frame() %>% mutate(X=1:40,complete = NA)
for(i in 1:nrow(frex_a3)){
  frex_a3$complete[i] <- paste(frex_a3[i,1:20],collapse=" ")
}

all_words_a3 <- bind_rows(frex_main %>% 
                         select(X,complete) %>% 
                         mutate(model="main"),
                       frex_a3 %>% 
                         select(X,complete) %>% 
                         mutate(model="distance_interaction"))

dfmat_a3 <- corpus(all_words_a3,text_field = "complete") %>% tokens() %>% dfm()
tstat2_a3 <- textstat_simil(dfmat_a3, method = "cosine", 
                         margin = "documents")

tstat2_pw_a3 <- as.matrix(tstat2_a3) %>% 
  reshape::melt() %>% 
  filter(!is.na(value)) %>% 
  filter(X1!=X2) %>% 
  mutate(basetext = as.numeric(str_remove(X1,"text")),
         comptext = as.numeric(str_remove(X2,"text"))) %>% 
  mutate(robustness_test = case_when(
    comptext %in% 41:80 ~ "distance_interaction")) %>% 
  mutate(comptext = case_when(
    robustness_test =="distance_interaction" ~  comptext -40)) %>% 
  filter(basetext%in%1:40) %>% 
  arrange(robustness_test,comptext)

tstat2_pw_a3 %>% 
  filter(basetext %in% c("12","13","22",
                         "32","38","40")) %>% 
  group_by(basetext) %>% 
  filter(value == max(value))

# Test: Is there a new citizen-related topic?
all_words_a3 %>% 
  filter(model=="distance_interaction") %>% 
  filter(grepl("vote|citizen|public",complete))

#### ...estimateEffect ####
if(run_a3_distance_interaction_estimateEffect=="yes"){
  prep_council_a3 <- estimateEffect_cluster(c(12,13,22,32,38,40) ~ final_two_months*image_lag6m_scaled+
                                              part_of_speech+gov_eu_supporter+
                                              gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                              unemployment_scaled+inflation_scaled+north_south+
                                              Negotiation_Stage+Council_Config_final,
                                            stm_out_a3,
                                            nsims = n_simulation,
                                            meta = docvars(data_dfm_a3),
                                            uncertainty = "Global")
  save(file="generated_data/prep_council_a3.RData",prep_council_a3)
}else{
  load(file="generated_data/prep_council_a3.RData")
}

#### ...regressions ####

if(run_a3_distance_interaction_regressions=="yes"){
  mod_top_12 <- summary.estimateEffect_comb(prep_council_a3,topics = 12)[[1]]
  mod_top_13 <- summary.estimateEffect_comb(prep_council_a3,topics = 13)[[1]]
  mod_top_22 <- summary.estimateEffect_comb(prep_council_a3,topics = 22)[[1]]
  mod_top_32 <- summary.estimateEffect_comb(prep_council_a3,topics = 32)[[1]]
  mod_top_38 <- summary.estimateEffect_comb(prep_council_a3,topics = 38)[[1]]
  mod_top_40 <- summary.estimateEffect_comb(prep_council_a3,topics = 40)[[1]]
  save(file="generated_data/regression_results_a3.RData",
       mod_top_12,mod_top_13,mod_top_22,
       mod_top_32,mod_top_38,mod_top_40)
}else{
  print("Loading saved regression interaction")
  load(file="generated_data/regression_results_a3.RData")
}
data.sum_a3 <- docvars(data_dfm_a3)

data.sum_a3$fake_theta <- runif(n = nrow(data.sum_a3),min = 0,max = 1)
m1 <- lm(fake_theta ~ final_two_months*image_lag6m_scaled+
                  part_of_speech+gov_eu_supporter+
                  gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                  unemployment_scaled+inflation_scaled+north_south+
                  Negotiation_Stage+Council_Config_final,
                data = data.sum_a3)

# This is Table K3
stargazer::stargazer(m1,m1,m1,
                     m1,m1,m1,
                   #  type = "text",
                     dep.var.labels   = "Topic Prevalence",
                     column.labels = c("Topic 12","Topic 13","Topic 22",
                                       "Topic 32","Topic 38","Topic 40"),
                     covariate.labels = c("Intercept","Last two months before election","Public Image of the EU", "Middle Part of Speech",
                                          "End Part of Speech", "Eurosceptic Government","Government Left-Right position", 
                                          "Net receipts from EU budget",
                                          "Budget issue","Unanimity Required","Unemployment Rate","Inflation Rate",
                                          "Northern Europe","Southern Europe","Negotiation stage: Initial Presentation",
                                          "Negotiation stage: Mixed Negotiations","Negotiation stage: Policy Debates",
                                          "Council configuration: Ecofin","Council configuration: EPSCO",
                                          "Council configuration: ENV","Council configuration: JHA",
                                          "Last two months before election x Public Image of the EU"),
                     coef = list(mod_top_12$tables[[1]][,1],
                                 mod_top_13$tables[[1]][,1],
                                 mod_top_22$tables[[1]][,1],
                                 mod_top_32$tables[[1]][,1],
                                 mod_top_38$tables[[1]][,1],
                                 mod_top_40$tables[[1]][,1]),
                     se = list(mod_top_12$tables[[1]][,2],
                               mod_top_13$tables[[1]][,2],
                               mod_top_22$tables[[1]][,2],
                               mod_top_32$tables[[1]][,2],
                               mod_top_38$tables[[1]][,2],
                               mod_top_40$tables[[1]][,2]),
                     omit.stat = c("f","ll","rsq","adj.rsq","ser"),
                     intercept.bottom = FALSE)
rm(m1)

#######################################-
#### Alternative 4: Distance to election linear interaction ####
#######################################-
data_dfm_a4 <- data_dfm
docvars(data_dfm_a4) <- docvars(data_dfm_a4) %>% 
  data.frame() %>% 
  select(gov_eu_supporter,image_lag6m_scaled,date_correct,Transcription,distance_elect_planned,
         part_of_speech,text_copy,text_original,Actor,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final) %>% 
  mutate(distance_elect_planned_years = distance_elect_planned/365)
data_dfm_a4 <- dfm_subset(data_dfm_a4,complete.cases(docvars(data_dfm_a4)))

if(run_a4_distance_linear_stm == "yes"){
  set.seed(1711)
  stm_out_a4 <- stm(documents = data_dfm_a4,K=40,
                                   prevalence=~ distance_elect_planned_years*image_lag6m_scaled+
                                     part_of_speech+gov_eu_supporter+
                                     gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                     unemployment_scaled+inflation_scaled+north_south+
                                     Negotiation_Stage+Council_Config_final,
                                   data = docvars(data_dfm_a4),
                                   init.type = "Spectral")
  save(file="generated_data/stm_out_a4.RData",stm_out_a4)
}else{
  load(file="generated_data/stm_out_a4.RData")
}

#### ...FREX Similarity ####
frex_main <- read.csv("generated_data/frex_words_40_main_model.csv") %>% mutate(complete = NA)
for(i in 1:nrow(frex_main)){
  frex_main$complete[i] <- paste(frex_main[i,3:22],collapse=" ")
}

frex_a4 <- stm::labelTopics(stm_out_a4,n = 20)
frex_a4 <- frex_a4[["frex"]] %>% data.frame() %>% mutate(X=1:40,complete = NA)
for(i in 1:nrow(frex_a4)){
  frex_a4$complete[i] <- paste(frex_a4[i,1:20],collapse=" ")
}

all_words_a4 <- bind_rows(frex_main %>% 
                         select(X,complete) %>% 
                         mutate(model="main"),
                       frex_a4 %>% 
                         select(X,complete) %>% 
                         mutate(model="distance_linear"))

dfmat_a4 <- corpus(all_words_a4,text_field = "complete") %>% tokens() %>% dfm()
tstat2_a4 <- textstat_simil(dfmat_a4, method = "cosine", 
                         margin = "documents")

tstat2_pw_a4 <- as.matrix(tstat2_a4) %>% 
  reshape::melt() %>% 
  filter(!is.na(value)) %>% 
  filter(X1!=X2) %>% 
  mutate(basetext = as.numeric(str_remove(X1,"text")),
         comptext = as.numeric(str_remove(X2,"text"))) %>% 
  mutate(robustness_test = case_when(
    comptext %in% 41:80 ~ "distance_linear")) %>% 
  mutate(comptext = case_when(
    robustness_test =="distance_linear" ~  comptext -40)) %>% 
  filter(basetext%in%1:40) %>% 
  arrange(robustness_test,comptext) 

tstat2_pw_a4 %>% 
  filter(basetext %in% c("12","13","22",
                         "32","38","40")) %>% 
  group_by(basetext) %>% 
  filter(value == max(value))

# Test: Is there a new citizen-related topic?
all_words_a4 %>% 
  filter(model=="control_distance") %>% 
  filter(grepl("vote|citizen|public",complete))

#### ...estimateEffect ####

if(run_a4_distance_linear_estimateEffect=="yes"){
  prep_council_a4 <- estimateEffect_cluster(c(12,13,22,32,38,40) ~ distance_elect_planned_years*image_lag6m_scaled+
                                              part_of_speech+gov_eu_supporter+
                                              gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                              unemployment_scaled+inflation_scaled+north_south+
                                              Negotiation_Stage+Council_Config_final,
                                            stm_out_a4,
                                            nsims = n_simulation,
                                            meta = docvars(data_dfm_a4), 
                                            uncertainty = "Global")
  save(file="generated_data/prep_council_a4.RData",prep_council_a4)
}else{
  load(file="generated_data/prep_council_a4.RData")
}

#### ...regressions ####

if(run_a4_distance_linear_regressions=="yes"){
  mod_top_12 <- summary.estimateEffect_comb(prep_council_a4,topics = 12)[[1]]
  mod_top_13 <- summary.estimateEffect_comb(prep_council_a4,topics = 13)[[1]]
  mod_top_22 <- summary.estimateEffect_comb(prep_council_a4,topics = 22)[[1]]
  mod_top_32 <- summary.estimateEffect_comb(prep_council_a4,topics = 32)[[1]]
  mod_top_38 <- summary.estimateEffect_comb(prep_council_a4,topics = 38)[[1]]
  mod_top_40 <- summary.estimateEffect_comb(prep_council_a4,topics = 40)[[1]]
  save(file="generated_data/regression_results_a4.RData",
       mod_top_12,mod_top_13,mod_top_22,
       mod_top_32,mod_top_38,mod_top_40)
}else{
  print("Loading saved regression interaction")
  load(file="generated_data/regression_results_a4.RData")
}

data.sum_a4 <- docvars(data_dfm_a4)

data.sum_a4$fake_theta <- runif(n = nrow(data.sum_a4),min = 0,max = 1)
m1 <- lm(fake_theta ~ distance_elect_planned_years*image_lag6m_scaled+
                  part_of_speech+gov_eu_supporter+
                  gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                  unemployment_scaled+inflation_scaled+north_south+
                  Negotiation_Stage+Council_Config_final,
                data = data.sum_a4)

# This is Table K4
stargazer::stargazer(m1,m1,m1,
                     m1,m1,m1,
                     # type = "text",
                     dep.var.labels   = "Topic Prevalence",
                     column.labels = c("Topic 12","Topic 13","Topic 22",
                                       "Topic 32","Topic 38","Topic 40"),
                     covariate.labels = c("Intercept","Distance to Planned Election", "Public Image of the EU", "Middle Part of Speech",
                                          "End Part of Speech","Eurosceptic Government", "Government Left-Right position", 
                                          "Net receipts from EU budget",
                                          "Budget issue","Unanimity Required","Unemployment Rate","Inflation Rate",
                                          "Northern Europe","Southern Europe","Negotiation stage: Initial Presentation",
                                          "Negotiation stage: Mixed Negotiations","Negotiation stage: Policy Debates",
                                          "Council configuration: Ecofin","Council configuration: EPSCO",
                                          "Council configuration: ENV","Council configuration: JHA",
                                          "Distance to Planned Election x Public Image of the EU"),
                     coef = list(mod_top_12$tables[[1]][,1],
                                 mod_top_13$tables[[1]][,1],
                                 mod_top_22$tables[[1]][,1],
                                 mod_top_32$tables[[1]][,1],
                                 mod_top_38$tables[[1]][,1],
                                 mod_top_40$tables[[1]][,1]),
                     se = list(mod_top_12$tables[[1]][,2],
                               mod_top_13$tables[[1]][,2],
                               mod_top_22$tables[[1]][,2],
                               mod_top_32$tables[[1]][,2],
                               mod_top_38$tables[[1]][,2],
                               mod_top_40$tables[[1]][,2]),
                     omit.stat = c("f","ll","rsq","adj.rsq","ser"),
                     intercept.bottom = FALSE)
rm(m1)

#######################################-
#### Alternative 5: Government Bond Model ####
#######################################-
data_dfm_a5 <- data_dfm
docvars(data_dfm_a5) <- docvars(data_dfm_a5) %>% 
  data.frame() %>% 
  select(gov_eu_supporter,image_lag6m_scaled,date_correct,Transcription,bond_yields,
         part_of_speech,text_copy,text_original,Actor,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final) 

data_dfm_a5 <- dfm_subset(data_dfm_a5,complete.cases(docvars(data_dfm_a5)))
data.sum_a5 <- docvars(data_dfm_a5)

data.sum_a5 %>% 
  arrange(-bond_yields) %>% 
  select(Actor,date_correct,bond_yields) %>% 
  head()

data.sum_a5 %>% 
  summarise(mean.bond = mean(bond_yields))

if(run_a5_bonds_stm == "yes"){
  set.seed(1711)
  stm_out_a5 <- stm(documents = data_dfm_a5,
                    K=40,
                    prevalence=~ bond_yields*image_lag6m_scaled+
                      part_of_speech+gov_eu_supporter+
                      gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                      unemployment_scaled+inflation_scaled+north_south+
                      Negotiation_Stage+Council_Config_final,
                    data = docvars(data_dfm_a5),
                    init.type = "Spectral")
  
  save(file="generated_data/stm_out_a5.RData",stm_out_a5)
}else{
  load(file="generated_data/stm_out_a5.RData")
}

#### ...FREX Similarity ####
frex_main <- read.csv("generated_data/frex_words_40_main_model.csv") %>% mutate(complete = NA)
for(i in 1:nrow(frex_main)){
  frex_main$complete[i] <- paste(frex_main[i,3:22],collapse=" ")
}

frex_a5 <- stm::labelTopics(stm_out_a5,n = 20)
frex_a5 <- frex_a5[["frex"]] %>% data.frame() %>% mutate(X=1:40,complete = NA)
for(i in 1:nrow(frex_a5)){
  frex_a5$complete[i] <- paste(frex_a5[i,1:20],collapse=" ")
}

all_words_a5 <- bind_rows(frex_main %>% 
                         select(X,complete) %>% 
                         mutate(model="main"),
                       frex_a5 %>% 
                         select(X,complete) %>% 
                         mutate(model="bonds"))

dfmat_a5 <- corpus(all_words_a5,text_field = "complete") %>% 
  tokens() %>% 
  dfm()
tstat2_a5 <- textstat_simil(dfmat_a5, method = "cosine", 
                            margin = "documents")

tstat2_pw_a5 <- as.matrix(tstat2_a5) %>% 
  reshape::melt() %>% 
  filter(!is.na(value)) %>% 
  filter(X1!=X2) %>% 
  mutate(basetext = as.numeric(str_remove(X1,"text")),
         comptext = as.numeric(str_remove(X2,"text"))) %>% 
  mutate(robustness_test = case_when(
    comptext %in% 41:80 ~ "bonds")) %>% 
  mutate(comptext = case_when(
    robustness_test =="bonds" ~  comptext -40)) %>% 
  filter(basetext%in%1:40) %>% 
  arrange(robustness_test,comptext) 

tstat2_pw_a5 %>% 
  filter(basetext %in% c("12","13","22",
                         "32","38","40")) %>% 
  group_by(basetext) %>% 
  filter(value == max(value))

# Test: Is there a new citizen-related topic?
all_words_a5 %>% 
  filter(model=="bonds") %>% 
  filter(grepl("vote|citizen|public",complete))

#### ...estimateEffect ####

# Topic 12 is 12 (0.7)
# Topic 13 is 13 (0.75)
# Topic 22 is 23 (0.65)
# Topic 32 is 32 (0.75)
# Topic 38 is 39 (0.75)
# Topic 40 is 40 (0.65)

if(run_a5_bonds_estimateEffect == "yes"){
  prep_council_a5 <- estimateEffect_cluster(c(12,13,23,32,39,40) ~ bond_yields*image_lag6m_scaled+
                                              part_of_speech+gov_eu_supporter+
                                              gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                              unemployment_scaled+inflation_scaled+north_south+
                                              Negotiation_Stage+Council_Config_final,
                                            stm_out_a5,
                                            nsims = n_simulation,
                                            meta = docvars(data_dfm_a5), 
                                            uncertainty = "Global")
  save(file="generated_data/prep_council_a5.RData",prep_council_a5)
}else{
  load(file="generated_data/prep_council_a5.RData")
}

#### ...regressions ####

if(run_a5_bonds_regressions == "yes"){

  mod_top_12 <- summary.estimateEffect_comb(prep_council_a5,topics = 12)[[1]]
  mod_top_13 <- summary.estimateEffect_comb(prep_council_a5,topics = 13)[[1]]
  mod_top_23 <- summary.estimateEffect_comb(prep_council_a5,topics = 23)[[1]]
  mod_top_32 <- summary.estimateEffect_comb(prep_council_a5,topics = 32)[[1]]
  mod_top_39 <- summary.estimateEffect_comb(prep_council_a5,topics = 39)[[1]]
  mod_top_40 <- summary.estimateEffect_comb(prep_council_a5,topics = 40)[[1]]
 
  save(file="generated_data/regression_results_a5.RData",
       mod_top_12,mod_top_13,mod_top_23,
       mod_top_32,mod_top_39,mod_top_40)
}else{
  print("Loading saved regression interaction")
  load(file="generated_data/regression_results_a5.RData")
}

data.sum_a5$fake_theta <- runif(n = nrow(data.sum_a5),min = 0,max = 1)
m1 <- lm(fake_theta ~ bond_yields*image_lag6m_scaled+
                  part_of_speech+gov_eu_supporter+
                  gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                  unemployment_scaled+inflation_scaled+north_south+
                  Negotiation_Stage+Council_Config_final,
                data = data.sum_a5)

# This is Table K5
stargazer::stargazer(m1,m1,m1,
                     m1,m1,m1,
                     #  type = "text",
                     dep.var.labels   = "Topic Prevalence",
                     column.labels = c("Topic 12","Topic 13","Topic 23",
                                       "Topic 32","Topic 39","Topic 40"),
                     covariate.labels = c("Intercept","Bond Yields", "Public Image of the EU", "Middle Part of Speech",
                                          "End Part of Speech","Eurosceptic Government", "Government Left-Right position", 
                                          "Net receipts from EU budget",
                                          "Budget issue","Unanimity Required","Unemployment Rate","Inflation Rate",
                                          "Northern Europe","Southern Europe","Negotiation stage: Initial Presentation",
                                          "Negotiation stage: Mixed Negotiations","Negotiation stage: Policy Debates",
                                          "Council configuration: Ecofin","Council configuration: EPSCO",
                                          "Council configuration: ENV","Council configuration: JHA",
                                          "Bond Yields x Public Image of the EU"),
                     coef = list(mod_top_12$tables[[1]][,1],
                                 mod_top_13$tables[[1]][,1],
                                 mod_top_23$tables[[1]][,1],
                                 mod_top_32$tables[[1]][,1],
                                 mod_top_39$tables[[1]][,1],
                                 mod_top_40$tables[[1]][,1]),
                     se = list(mod_top_12$tables[[1]][,2],
                               mod_top_13$tables[[1]][,2],
                               mod_top_23$tables[[1]][,2],
                               mod_top_32$tables[[1]][,2],
                               mod_top_39$tables[[1]][,2],
                               mod_top_40$tables[[1]][,2]),
                     omit.stat = c("f","ll","rsq","adj.rsq","ser"),
                     intercept.bottom = FALSE)
rm(m1)

############################################-
#### Alternative 6: Negotiation Stage Interaction Model ####
############################################-

data_dfm_a6 <- data_dfm
docvars(data_dfm_a6) <- docvars(data_dfm_a6) %>% 
  data.frame() %>% 
  select(gov_eu_supporter,image_lag6m_scaled,date_correct,Transcription,
         part_of_speech,text_copy,text_original,Actor,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final) %>% 
  mutate(negotiation_stage_two = case_when(
    Negotiation_Stage == "Debates on Political Agreement" ~ "Debates on Political Agreement",
    TRUE ~ "Other Debates"
  ))
data_dfm_a6 <- dfm_subset(data_dfm_a6,complete.cases(docvars(data_dfm_a6)))

if(run_a6_negotiation_stm == "yes"){
  set.seed(1711)
  stm_out_a6 <- stm(documents = data_dfm_a6,
                    K=40,
                    prevalence=~ negotiation_stage_two*image_lag6m_scaled+
                      part_of_speech+gov_eu_supporter+
                      gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                      unemployment_scaled+inflation_scaled+north_south+
                      Council_Config_final,
                    data = docvars(data_dfm_a6),
                    init.type = "Spectral")
  
  save(file="generated_data/stm_out_a6.RData",stm_out_a6)
}else{
  load(file="generated_data/stm_out_a6.RData")
}

#### ...estimateEffect ####

if(run_a6_negotiation_estimateEffect == "yes"){
  prep_council_a6 <- estimateEffect_cluster(c(12,13,22,32,38,40) ~ negotiation_stage_two*image_lag6m_scaled+
                                              part_of_speech+gov_eu_supporter+
                                              gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                              unemployment_scaled+inflation_scaled+north_south+
                                              Council_Config_final,
                                            stm_out_a6,
                                            nsims = n_simulation,
                                            meta = docvars(data_dfm_a6), 
                                            uncertainty = "Global")
  
  save(file="generated_data/prep_council_a6.RData",prep_council_a6)
}else{
  load(file="generated_data/prep_council_a6.RData")
}

#### ...regressions ####

if(run_a6_negotiation_regressions == "yes"){
  mod_top_12 <- summary.estimateEffect_comb(prep_council_a6,topics = 12)[[1]]
  mod_top_13 <- summary.estimateEffect_comb(prep_council_a6,topics = 13)[[1]]
  mod_top_22 <- summary.estimateEffect_comb(prep_council_a6,topics = 22)[[1]]
  mod_top_32 <- summary.estimateEffect_comb(prep_council_a6,topics = 32)[[1]]
  mod_top_38 <- summary.estimateEffect_comb(prep_council_a6,topics = 38)[[1]]
  mod_top_40 <- summary.estimateEffect_comb(prep_council_a6,topics = 40)[[1]]
  
  data.sum <- docvars(data_dfm_a6)
  #### Regression Topic 12 ###
  tab_12_a6 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "negotiation_stage_two",
                                        topic = 12,
                                        estimate_object = prep_council_a6)
  
  #### Regression Topic 13 ###
  tab_13_a6 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "negotiation_stage_two",
                                        topic = 13,
                                        estimate_object = prep_council_a6)
  
  #### Regression Topic 22 ###
  tab_22_a6 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "negotiation_stage_two",
                                        topic = 22,
                                        estimate_object = prep_council_a6)
  
  #### Regression Topic 32 ###
  tab_32_a6 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "negotiation_stage_two",
                                        topic = 32,
                                        estimate_object = prep_council_a6)
  
  #### Regression Topic 38 ###
  tab_38_a6 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "negotiation_stage_two",
                                        topic = 38,
                                        estimate_object = prep_council_a6)
  
  #### Regression Topic 40 ###
  tab_40_a6 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "negotiation_stage_two",
                                        topic = 40,
                                        estimate_object = prep_council_a6)
  
  results_all_a6 <- bind_rows(tab_12_a6,tab_13_a6,tab_22_a6,
                              tab_32_a6,tab_38_a6,tab_40_a6) %>% 
    mutate(Topic_name = c(rep("Delaying agreement",2),
                          rep("Formulating a demand",2),
                          rep("Supporting the compromise",2),
                          rep("More technical-level discussion needed",2),
                          rep("Cautious language",2),
                          rep("Raising a concern",2))) %>% 
    mutate(upper = Estimate + 1.96*Std.Error,
           lower = Estimate - 1.96*Std.Error) %>% 
    mutate(moderator_level = case_when(
      moderator_level == "negotiation_stage_twoOther Debates:image_lag6m_scaled" ~ "Other Debates",
      moderator_level == "baseline" ~ "Debates on Political Agreement"
    ))
  save(file="generated_data/regression_results_a6.RData",results_all_a6,
       mod_top_12,mod_top_13,mod_top_22,
       mod_top_32,mod_top_38,mod_top_40)
}else{
  print("Loading saved regression interaction")
  load(file="generated_data/regression_results_a6.RData")
}

data.sum_a6 <- docvars(data_dfm_a6)

data.sum_a6$fake_theta <- runif(n = nrow(data.sum_a6),min = 0,max = 1)
m1 <- lm(fake_theta ~ negotiation_stage_two*image_lag6m_scaled+
                     part_of_speech+gov_eu_supporter+
                     gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                     unemployment_scaled+inflation_scaled+north_south+
                     Council_Config_final,
                   data = data.sum_a6)

# This is Table K6
stargazer::stargazer(m1,m1,m1,
                     m1,m1,m1,
                     # type = "text",
                     dep.var.labels   = "Topic Prevalence",
                     column.labels = c("Topic 12","Topic 13","Topic 22",
                                       "Topic 32","Topic 38","Topic 40"),
                     covariate.labels = c("Intercept","Other Debates", "Public Image of the EU", "Middle Part of Speech",
                                          "End Part of Speech","Eurosceptic Government", "Government Left-Right position", 
                                          "Net receipts from EU budget",
                                          "Budget issue","Unanimity Required","Unemployment Rate","Inflation Rate",
                                          "Northern Europe","Southern Europe",
                                          "Council configuration: Ecofin","Council configuration: EPSCO",
                                          "Council configuration: ENV","Council configuration: JHA",
                                          "Other Debates x Public Image of the EU"),
                     coef = list(mod_top_12$tables[[1]][,1],
                                 mod_top_13$tables[[1]][,1],
                                 mod_top_22$tables[[1]][,1],
                                 mod_top_32$tables[[1]][,1],
                                 mod_top_38$tables[[1]][,1],
                                 mod_top_40$tables[[1]][,1]),
                     se = list(mod_top_12$tables[[1]][,2],
                               mod_top_13$tables[[1]][,2],
                               mod_top_22$tables[[1]][,2],
                               mod_top_32$tables[[1]][,2],
                               mod_top_38$tables[[1]][,2],
                               mod_top_40$tables[[1]][,2]),
                     omit.stat = c("f","ll","rsq","adj.rsq","ser"),
                     intercept.bottom = FALSE)

rm(m1)

# This is Table K7
xtable::xtable(results_all_a6 %>% 
                 select(-upper,-lower) %>% 
                 dplyr::rename('Government Ideology' = "moderator_level",
                               'Topic Label' = "Topic_name",
                               't-value' = "tval",
                               'p-value' = "p"),digits=5) %>% 
  print(include.rowname=FALSE)

political_agreement_a6 <- results_all_a6 %>% 
  dplyr::rename("term" = "Topic_name",
         "model" = "moderator_level",
         "estimate" = "Estimate",
         "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 1",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 2",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 3"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Debates on Political Agreement") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

other_debates_a6 <- results_all_a6 %>% 
  dplyr::rename("term" = "Topic_name",
         "model" = "moderator_level",
         "estimate" = "Estimate",
         "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 1",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 2",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 3"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Other Debates") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

# This is Figure K5
ggsave(plot = ggpubr::ggarrange(political_agreement_a6,other_debates_a6,ncol=1,nrow=2),
       "figures_appendix/figure_k_5.eps", width = 6, height = 6, units = "in")

#######################################-
#### Alternative 7: Length of Debate ####
#######################################-
data_dfm_a7 <- data_dfm
docvars(data_dfm_a7) <- docvars(data_dfm_a7) %>% 
  data.frame() %>% 
  select(gov_eu_supporter,image_lag6m_scaled,date_correct,Transcription,distance_elect_planned,
         part_of_speech,text_copy,text_original,Actor,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,number_words,
         Negotiation_Stage,Council_Config_final)
data_dfm_a7 <- dfm_subset(data_dfm_a7,complete.cases(docvars(data_dfm_a7)))
data.sum <- docvars(data_dfm_a7)

debate_lengths <- data.sum %>% 
  group_by(Transcription,date_correct) %>% 
  summarise(length_debate = sum(number_words)/3) %>% 
  ungroup()

debate_lengths %>% 
  summarise(mean_debates = mean(length_debate),
            sd_debates = sd(length_debate),
            to_exclude = quantile(length_debate,0.1))

p.deb.length <- debate_lengths %>% 
  ggplot(aes(x=date_correct,y=length_debate)) +
  geom_point()+
  geom_smooth(se=F)+
  theme_bw()+
  labs(x="",y="Debate length in words")

# This is Figure K6
ggsave(plot = p.deb.length,
       "figures_appendix/figure_k_6.eps", width = 7, height = 3.5, units = "in")

docvars(data_dfm_a7) <- docvars(data_dfm_a7) %>% 
  data.frame() %>% 
  left_join(debate_lengths %>% select(Transcription,length_debate)) %>% 
  mutate(short_debate = ifelse(length_debate<quantile(debate_lengths$length_debate,0.25),"Yes","No"))

if(run_a7_length_stm == "yes"){
  set.seed(1711)
  stm_out_a7 <- stm(documents = data_dfm_a7,K=40,
                                       prevalence=~ short_debate*image_lag6m_scaled+
                                         part_of_speech+gov_eu_supporter+
                                         gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                         unemployment_scaled+inflation_scaled+north_south+
                                         Negotiation_Stage+Council_Config_final,
                                       data = docvars(data_dfm_a7),
                                       init.type = "Spectral")
  save(file="generated_data/stm_out_a7.RData",stm_out_a7)
}else{
  load(file="generated_data/stm_out_a7.RData")
}

#### ...FREX Similarity ####
frex_main <- read.csv("generated_data/frex_words_40_main_model.csv") %>% mutate(complete = NA)
for(i in 1:nrow(frex_main)){
  frex_main$complete[i] <- paste(frex_main[i,3:22],collapse=" ")
}

frex_a7 <- stm::labelTopics(stm_out_a7,n = 20)
frex_a7 <- frex_a7[["frex"]] %>% data.frame() %>% mutate(X=1:40,complete = NA)
for(i in 1:nrow(frex_a7)){
  frex_a7$complete[i] <- paste(frex_a7[i,1:20],collapse=" ")
}

all_words_a7 <- bind_rows(frex_main %>% 
                         select(X,complete) %>% 
                         mutate(model="main"),
                       frex_a7 %>% 
                         select(X,complete) %>% 
                         mutate(model="length"))

dfmat_a7 <- corpus(all_words_a7,text_field = "complete") %>% tokens() %>% dfm()
tstat2_a7 <- textstat_simil(dfmat_a7, method = "cosine", 
                            margin = "documents")

tstat2_pw_a7 <- as.matrix(tstat2_a7) %>% 
  reshape::melt() %>% 
  filter(!is.na(value)) %>% 
  filter(X1!=X2) %>% 
  mutate(basetext = as.numeric(str_remove(X1,"text")),
         comptext = as.numeric(str_remove(X2,"text"))) %>% 
  mutate(robustness_test = case_when(
    comptext %in% 41:80 ~ "length")) %>% 
  mutate(comptext = case_when(
    robustness_test =="length" ~  comptext -40)) %>% 
  filter(basetext%in%1:40) %>% 
  arrange(robustness_test,comptext) 

tstat2_pw_a7 %>% 
  filter(basetext %in% c("12","13","22",
                         "32","38","40")) %>% 
  group_by(basetext) %>% 
  filter(value == max(value))

# Test: Is there a new citizen-related topic?
all_words_a7 %>% 
  filter(model=="length") %>% 
  filter(grepl("vote|citizen|public",complete))

#### ...estimateEffect ####

if(run_a7_length_estimateEffect == "yes"){
  prep_council_a7 <- estimateEffect_cluster(c(12,13,22,32,38,40) ~ short_debate*image_lag6m_scaled+
                                              part_of_speech+gov_eu_supporter+
                                              gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                              unemployment_scaled+inflation_scaled+north_south+
                                              Negotiation_Stage+Council_Config_final,
                                            stm_out_a7,
                                            nsims = n_simulation,
                                            meta = docvars(data_dfm_a7), 
                                            uncertainty = "Global")
  
  save(file="generated_data/prep_council_a7.RData",prep_council_a7)
}else{
  load(file="generated_data/prep_council_a7.RData")
}

#### ...regressions ####

if(run_a7_length_regressions == "yes"){
  mod_top_12 <- summary.estimateEffect_comb(prep_council_a7,topics = 12)[[1]]
  mod_top_13 <- summary.estimateEffect_comb(prep_council_a7,topics = 13)[[1]]
  mod_top_22 <- summary.estimateEffect_comb(prep_council_a7,topics = 22)[[1]]
  mod_top_32 <- summary.estimateEffect_comb(prep_council_a7,topics = 32)[[1]]
  mod_top_38 <- summary.estimateEffect_comb(prep_council_a7,topics = 38)[[1]]
  mod_top_40 <- summary.estimateEffect_comb(prep_council_a7,topics = 40)[[1]]
  save(file="generated_data/regression_results_a7.RData",
       mod_top_12,mod_top_13,mod_top_22,
       mod_top_32,mod_top_38,mod_top_40)
}else{
  print("Loading saved regression interaction")
  load(file="generated_data/regression_results_a7.RData")
}

data.sum_a7 <- docvars(data_dfm_a7)

data.sum_a7$fake_theta <- runif(n = nrow(data.sum_a7),min = 0,max = 1)
m1 <- lm(fake_theta ~ short_debate*image_lag6m_scaled+
                  part_of_speech+gov_eu_supporter+
                  gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                  unemployment_scaled+inflation_scaled+north_south+
                  Negotiation_Stage+Council_Config_final,
                data = data.sum_a7)

# This is Table K8
stargazer::stargazer(m1,m1,m1,
                     m1,m1,m1,
                     #    type = "text",
                     dep.var.labels   = "Topic Prevalence",
                     column.labels = c("Topic 12","Topic 13","Topic 22",
                                       "Topic 32","Topic 38","Topic 40"),
                     covariate.labels = c("Intercept","Short Debate", "Public Image of the EU", "Middle Part of Speech",
                                          "End Part of Speech","Eurosceptic Government", "Government Left-Right position", 
                                          "Net receipts from EU budget",
                                          "Budget issue","Unanimity Required","Unemployment Rate","Inflation Rate",
                                          "Northern Europe","Southern Europe","Negotiation stage: Initial Presentation",
                                          "Negotiation stage: Mixed Negotiations","Negotiation stage: Policy Debates",
                                          "Council configuration: Ecofin","Council configuration: EPSCO",
                                          "Council configuration: ENV","Council configuration: JHA",
                                          "Short Debate x Public Image of the EU"),
                     coef = list(mod_top_12$tables[[1]][,1],
                                 mod_top_13$tables[[1]][,1],
                                 mod_top_22$tables[[1]][,1],
                                 mod_top_32$tables[[1]][,1],
                                 mod_top_38$tables[[1]][,1],
                                 mod_top_40$tables[[1]][,1]),
                     se = list(mod_top_12$tables[[1]][,2],
                               mod_top_13$tables[[1]][,2],
                               mod_top_22$tables[[1]][,2],
                               mod_top_32$tables[[1]][,2],
                               mod_top_38$tables[[1]][,2],
                               mod_top_40$tables[[1]][,2]),
                     omit.stat = c("f","ll","rsq","adj.rsq","ser"),
                     intercept.bottom = FALSE)
rm(m1)

#######################################-
#### Alternative 8: Size of Country Model ####
#######################################-
data_dfm_a8 <- data_dfm
docvars(data_dfm_a8) <- docvars(data_dfm_a8) %>% 
  data.frame() %>% 
  select(gov_eu_supporter,image_lag6m_scaled,date_correct,Transcription,large_small,
         part_of_speech,text_copy,text_original,Actor,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final) 

data_dfm_a8 <- dfm_subset(data_dfm_a8,complete.cases(docvars(data_dfm_a8)))

if(run_a8_size_stm == "yes"){
  set.seed(1711)
  stm_out_a8 <- stm(documents = data_dfm_a8,
                    K=40,
                    prevalence=~ large_small*image_lag6m_scaled+
                      part_of_speech+gov_eu_supporter+
                      gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                      unemployment_scaled+inflation_scaled+north_south+
                      Negotiation_Stage+Council_Config_final,
                    data = docvars(data_dfm_a8),
                    init.type = "Spectral")
  save(file="generated_data/stm_out_a8.RData",stm_out_a8)
}else{
  load(file="generated_data/stm_out_a8.RData")
}

#### ...FREX Similarity ####
frex_main <- read.csv("generated_data/frex_words_40_main_model.csv") %>% mutate(complete = NA)
for(i in 1:nrow(frex_main)){
  frex_main$complete[i] <- paste(frex_main[i,3:22],collapse=" ")
}

frex_a8 <- stm::labelTopics(stm_out_a8,n = 20)
frex_a8 <- frex_a8[["frex"]] %>% data.frame() %>% mutate(X=1:40,complete = NA)
for(i in 1:nrow(frex_a8)){
  frex_a8$complete[i] <- paste(frex_a8[i,1:20],collapse=" ")
}

all_words_a8 <- bind_rows(frex_main %>% 
                         select(X,complete) %>% 
                         mutate(model="main"),
                       frex_a8 %>% 
                         select(X,complete) %>% 
                         mutate(model="size"))

dfmat_a8 <- corpus(all_words_a8,text_field = "complete") %>% tokens() %>% dfm()
tstat2_a8 <- textstat_simil(dfmat_a8, method = "cosine", 
                         margin = "documents")

tstat2_pw_a8 <- as.matrix(tstat2_a8) %>% 
  reshape::melt() %>% 
  filter(!is.na(value)) %>% 
  filter(X1!=X2) %>% 
  mutate(basetext = as.numeric(str_remove(X1,"text")),
         comptext = as.numeric(str_remove(X2,"text"))) %>% 
  mutate(robustness_test = case_when(
    comptext %in% 41:80 ~ "size")) %>% 
  mutate(comptext = case_when(
    robustness_test =="size" ~  comptext -40)) %>% 
  filter(basetext%in%1:40) %>% 
  arrange(robustness_test,comptext) 

tstat2_pw_a8 %>% 
  filter(basetext %in% c("12","13","22",
                         "32","38","40")) %>% 
  group_by(basetext) %>% 
  filter(value == max(value))

# Test: Is there a new citizen-related topic?
all_words_a8 %>% 
  filter(model=="size") %>% 
  filter(grepl("vote|citizen|public",complete))

#### ...estimateEffect ####

if(run_a8_size_estimateEffect == "yes"){
  prep_council_a8 <- estimateEffect_cluster(1:40 ~ large_small*image_lag6m_scaled+
                                              part_of_speech+gov_eu_supporter+
                                              gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                              unemployment_scaled+inflation_scaled+north_south+
                                              Negotiation_Stage+Council_Config_final,
                                            stm_out_a8,
                                            nsims = n_simulation,
                                            meta = docvars(data_dfm_a8), 
                                            uncertainty = "Global")
  
  save(file="generated_data/prep_council_a8.RData",prep_council_a8)
}else{
  load(file="generated_data/prep_council_a8.RData")
}

#### ...regressions ####

if(run_a8_size_regressions == "yes"){
  mod_top_all_a8 <- summary.estimateEffect_comb(prep_council_a8,topics = 1:40)[[1]]
  
  all_reg_a8_save <- mod_top_all_a8$tables
  all_reg_a8 <- data.frame(topic = 1:40,
                              est = NA,
                              se = NA)
  
  for (i in 1:nrow(all_reg_a8)){
    mod_save <- all_reg_a8_save[[i]] %>% as.data.frame() %>% mutate(term=row.names(.))
    all_reg_a8$est[i] <- mod_save %>% filter(term == "large_smallSmall") %>% select(Estimate) %>% unlist()
    all_reg_a8$se[i]  <- mod_save %>% filter(term == "large_smallSmall") %>% select(`Std. Error`) %>% unlist()
    rm(mod_save)
  }
  
  all_reg_a8$lower = all_reg_a8$est - 1.96*all_reg_a8$se
  all_reg_a8$upper = all_reg_a8$est + 1.96*all_reg_a8$se
  
  mod_top_12 <- summary.estimateEffect_comb(prep_council_a8,topics = 12)[[1]]
  mod_top_13 <- summary.estimateEffect_comb(prep_council_a8,topics = 13)[[1]]
  mod_top_22 <- summary.estimateEffect_comb(prep_council_a8,topics = 22)[[1]]
  mod_top_32 <- summary.estimateEffect_comb(prep_council_a8,topics = 32)[[1]]
  mod_top_38 <- summary.estimateEffect_comb(prep_council_a8,topics = 38)[[1]]
  mod_top_40 <- summary.estimateEffect_comb(prep_council_a8,topics = 40)[[1]]
  
  data.sum <- docvars(data_dfm_a8)
  #### Regression Topic 12 ###
  tab_12_a8 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "large_small",
                                        topic = 12,
                                        estimate_object = prep_council_a8)
  
  #### Regression Topic 13 ###
  tab_13_a8 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "large_small",
                                        topic = 13,
                                        estimate_object = prep_council_a8)
  
  #### Regression Topic 22 ###
  tab_22_a8 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "large_small",
                                        topic = 22,
                                        estimate_object = prep_council_a8)
  
  #### Regression Topic 32 ###
  tab_32_a8 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "large_small",
                                        topic = 32,
                                        estimate_object = prep_council_a8)
  
  #### Regression Topic 38 ###
  tab_38_a8 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "large_small",
                                        topic = 38,
                                        estimate_object = prep_council_a8)
  
  #### Regression Topic 40 ###
  tab_40_a8 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "large_small",
                                        topic = 40,
                                        estimate_object = prep_council_a8)
  
  results_all_a8 <- bind_rows(tab_12_a8,tab_13_a8,tab_22_a8,
                              tab_32_a8,tab_38_a8,tab_40_a8) %>% 
    mutate(Topic_name = c(rep("Delaying agreement",2),
                          rep("Formulating a demand",2),
                          rep("Supporting the compromise",2),
                          rep("More technical-level discussion needed",2),
                          rep("Cautious language",2),
                          rep("Raising a concern",2))) %>% 
    mutate(upper = Estimate + 1.96*Std.Error,
           lower = Estimate - 1.96*Std.Error) %>% 
    mutate(moderator_level = case_when(
      moderator_level == "large_smallSmall:image_lag6m_scaled" ~ "Small member states",
      moderator_level == "baseline" ~ "Large member states"
    ))
  
  save(file="generated_data/regression_results_a8.RData",
       all_reg_a8,
       mod_top_12,mod_top_13,mod_top_22,
       mod_top_32,mod_top_38,mod_top_40,
       results_all_a8)
}else{
  print("Loading saved regression interaction")
  load(file="generated_data/regression_results_a8.RData")
}

data.sum_a8 <- docvars(data_dfm_a8)

data.sum_a8$fake_theta <- runif(n = nrow(data.sum_a8),min = 0,max = 1)
m1 <- lm(fake_theta ~ large_small*image_lag6m_scaled+
                  part_of_speech+gov_eu_supporter+
                  gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                  unemployment_scaled+inflation_scaled+north_south+
                  Negotiation_Stage+Council_Config_final,
                data = data.sum_a8)

# This is Table K9
stargazer::stargazer(m1,m1,m1,
                     m1,m1,m1,
                     #type = "text",
                     dep.var.labels   = "Topic Prevalence",
                     column.labels = c("Topic 12","Topic 13","Topic 22",
                                       "Topic 32","Topic 38","Topic 40"),
                     covariate.labels = c("Intercept","Small Member States", "Public Image of the EU", "Middle Part of Speech",
                                          "End Part of Speech","Eurosceptic Government", "Government Left-Right position", 
                                          "Net receipts from EU budget",
                                          "Budget issue","Unanimity Required","Unemployment Rate","Inflation Rate",
                                          "Northern Europe","Southern Europe","Negotiation stage: Initial Presentation",
                                          "Negotiation stage: Mixed Negotiations","Negotiation stage: Policy Debates",
                                          "Council configuration: Ecofin","Council configuration: EPSCO",
                                          "Council configuration: ENV","Council configuration: JHA",
                                          "Small Member States x Public Image of the EU"),
                     coef = list(mod_top_12$tables[[1]][,1],
                                 mod_top_13$tables[[1]][,1],
                                 mod_top_22$tables[[1]][,1],
                                 mod_top_32$tables[[1]][,1],
                                 mod_top_38$tables[[1]][,1],
                                 mod_top_40$tables[[1]][,1]),
                     se = list(mod_top_12$tables[[1]][,2],
                               mod_top_13$tables[[1]][,2],
                               mod_top_22$tables[[1]][,2],
                               mod_top_32$tables[[1]][,2],
                               mod_top_38$tables[[1]][,2],
                               mod_top_40$tables[[1]][,2]),
                     omit.stat = c("f","ll","rsq","adj.rsq","ser"),
                     intercept.bottom = FALSE)#
rm(m1)

small_states_a8 <- results_all_a8 %>% 
  dplyr::rename("term" = "Topic_name",
         "model" = "moderator_level",
         "estimate" = "Estimate",
         "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 1",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 2",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 3"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Small member states") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

large_states_a8 <- results_all_a8 %>% 
  dplyr::rename("term" = "Topic_name",
         "model" = "moderator_level",
         "estimate" = "Estimate",
         "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 1",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 2",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 3"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Large member states") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

# This is Figure K7
ggsave(plot = ggpubr::ggarrange(small_states_a8,large_states_a8,ncol=1,nrow=2),
       "figures_appendix/figure_k_7.eps", width = 6, height = 6, units = "in")

all_reg_a8$topic_name<- all_reg_a8$topic %>% 
  recode("1"  = "Topic 1: Thanking I (rather specific person)",
         "2"  = "Topic 2: Exchange of information",
         "3"  = "Topic 3: Internal market",
         "4"  = "Topic 4: Ships",
         "5"  = "Topic 5: Reflection",
         "6"  = "Topic 6: Thanking II (rather abstract groups)",
         "7"  = "Topic 7: European judicial matters / \nEuropean public prosecutor",
         "8"  = "Topic 8: Referring to Commissioner",
         "9"  = "Topic 9: Banking supervision",
         "10" = "Topic 10: Burdens of implementation \n(esp. environment)",
         "11" = "Topic 11: Renewable energy and climate",
         "12" = "Topic 12: Delaying agreement",
         "13" = "Topic 13: Formulating a demand",
         "14" = "Topic 14: Invasive species",
         "15" = "Topic 15: GMOs",
         "16" = "Topic 16: Crime",
         "17" = "Topic 17: Budget",
         "18" = "Topic 18: Talking about legal text",
         "19" = "Topic 19: International money laundering",
         "20" = "Topic 20: Governance of four freedoms \n(e.g. capital, workers)",
         "21" = "Topic 21: Health and medical devices",
         "22" = "Topic 22: Supporting the compromise",
         "23" = "Topic 23: Congratulating",
         "24" = "Topic 24: Legal harmonization",
         "25" = "Topic 25: Cooperation between member states",
         "26" = "Topic 26: Air emissions and pollutants",
         "27" = "Topic 27: Tax evasion and fraud",
         "28" = "Topic 28: Banking union (esp. \nresolution and deposit insurance)",
         "29" = "Topic 29: Data protection",
         "30" = "Topic 30: Public procurement and tobacco",
         "31" = "Topic 31: Financial crisis",
         "32" = "Topic 32: More technical-level discussion needed",
         "33" = "Topic 33: Research and development \n(e.g. Horizon 2020)",
         "34" = "Topic 34: Audit and control",
         "35" = "Topic 35: Talking about reaching compromise",
         "36" = "Topic 36: Negotiations with EP",
         "37" = "Topic 37: Brief intervention",
         "38" = "Topic 38: Cautious language",
         "39" = "Topic 39: Affirmation",
         "40" = "Topic 40: Raising a concern"
  )

size_direct_plot <- all_reg_a8 %>% 
  ggplot(aes(x=est,xmin=lower,xmax=upper,y=reorder(topic_name,-topic))) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of small member state on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme(plot.title = element_text(face="bold"),
        legend.position = "none",
        axis.text=element_text(colour="black"),
        axis.title=element_text(size=8,colour="black"),
        strip.text.y = element_text(angle = 0, face="bold"),
        strip.background =element_rect(fill="white"),
        axis.ticks = element_blank(),
        panel.background = element_blank(), 
        panel.spacing.x = unit(0,"line"))

# This is Figure K8
ggsave(plot = size_direct_plot,
       "figures_appendix/figure_k_8.eps", width = 6, height = 7.5, units = "in")

#### END ####

